library(ggplot2)
  library(reshape2)
  library(dplyr)
  library(tidyr)
  library(GGally)
  library(grid)
  library(calibrate)
  library(knitr)
  "%&%" = function(a,b) paste(a,b,sep="")
  se <- function(x) sqrt(var(x,na.rm=TRUE)/length(is.na(x)==FALSE))
  source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/GTEx_2014-06013_release/transfers/PrediXmod/Paper_plots/multiplot.R')
  my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
  fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
  rna.dir <- my.dir %&% "gtex-rnaseq/"
  annot.dir <- my.dir %&% "gtex-annot/"
  out.dir <- rna.dir %&% "ind-tissues-RPKM/"
##read in SE calculated from 100 perms - Price et al. method
setable <- read.table(my.dir %&% "SE_estimate_from_h2_localonly_reml-no-constrain_allgenes_100perms.txt",header=T)
dgn <- read.table(my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.Chr1-22_globalAll_reml-no-constrain.2015-12-15.txt',header=T)
tislist <- scan(my.dir %&% 'gtex-rnaseq/ind-tissues-from-nick/GTEx_PrediXmod.tissue.list',sep="\n",what="character")
tisspacelist <- scan(my.dir %&% '40.tissue.list',sep="\n",what="character")
table1 <- matrix(0,nrow=length(tislist)+2,ncol=6)
n <- dgn$N[1]
numexpgenes <- dim(dgn)[1]
#numexpgenes <- length(dgn$local.p[is.na(dgn$local.p)==FALSE]) 
meanh2 <- sprintf("%.3f",mean(dgn$local.h2,na.rm=TRUE))
seperm <- setable$se[1]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <-  dgn %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH"))  %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100)
numsig <- table(pest$Qlt05)[2]
table1[1,] <- c("DGN Whole Blood",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="DGN")

hist(pest$pchi,main="DGN")

#add cross-tissue to table 1
ct <- read.table(my.dir %&% 'cross-tissue.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T)
n <- ct$N[1]
numexpgenes<-dim(ct)[1]
numexpgenes <- length(ct$local.p[is.na(ct$local.p)==FALSE]) 
meanh2 <- sprintf("%.3f",mean(ct$local.h2,na.rm=TRUE))
seperm <- setable$se[2]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <-  ct %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[2,] <- c("Cross-tissue",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="CT")

hist(pest$pchi,main="CT")

##calc mean global for DGN
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)
## [1] 0.143
signif(se(dgn$loc.jt.h2),3)
## [1] 0.00153
signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3)
## [1] 0.034
signif(se(dgn$glo.jt.h2),3)
## [1] 0.00239
pest <-  dgn %>% mutate(glo.jt.P=pchisq((glo.jt.h2/glo.jt.se)^2, df=1, lower.tail=FALSE)) %>%  mutate(glo.jt.Q=p.adjust(glo.jt.P,method="BH"))   %>% arrange(glo.jt.h2) %>% mutate(Qlt05=glo.jt.Q<0.1) 
propsig <- table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100
propsig
##     TRUE 
## 3.931127
table(pest$Qlt05)
## 
## FALSE  TRUE 
##  9692   500
##prop loc
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)/(signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)+signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3))
## [1] 0.8079096
for(i in 1:length(tislist)){
  tis <- tislist[i]
  data <- read.table(my.dir %&% 'GTEx.TW.' %&% tis  %&% '.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T,sep="\t")  
  explist <- scan(out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist","c")
  data <- dplyr::filter(data,ensid %in% explist)
  n <- data$N[1]
  numexpgenes <- dim(data)[1]
  #numexpgenes <- length(data$local.p[is.na(data$local.p)==FALSE]) ##num expressed genes mean(RPKM)>0.1
  meanh2 <- sprintf("%.3f",mean(data$local.h2,na.rm=TRUE))
  seperm <- setable$se[i+2]
  pest <-  data %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1) 
  propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100)
  numsig <- table(pest$Qlt05)[2]
  tisspace <- tisspacelist[i]
  meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
  tableinfo <- c(tisspace,n,meanandse,propsig,numsig,numexpgenes)
  table1[i+2,] <- tableinfo
  hist(pest$local.p,main=tis)
  hist(pest$pchi,main=tis)
}

colnames(table1)=c("tissue","n","mean h2 (SE)","% FDR<0.1","num FDR<0.1","num expressed")
#table1

library(xtable)
## Warning: package 'xtable' was built under R version 3.2.3
tab <- xtable(table1)
print(tab, type="latex",include.rownames=FALSE)
## % latex table generated in R 3.2.1 by xtable 1.8-2 package
## % Tue Aug 16 11:00:12 2016
## \begin{table}[ht]
## \centering
## \begin{tabular}{llllll}
##   \hline
## tissue & n & mean h2 (SE) & \% FDR$<$0.1 & num FDR$<$0.1 & num expressed \\ 
##   \hline
## DGN Whole Blood & 922 & 0.149 (0.0039) & 58.8 & 7474 & 12719 \\ 
##   Cross-tissue & 450 & 0.062 (0.017) & 20.2 & 2995 & 14861 \\ 
##   Adipose - Subcutaneous & 298 & 0.038 (0.028) & 18.8 & 2666 & 14205 \\ 
##   Adrenal Gland & 126 & 0.043 (0.018) & 17.5 & 2479 & 14150 \\ 
##   Artery - Aorta & 198 & 0.042 (0.024) & 17.2 & 2385 & 13844 \\ 
##   Artery - Coronary & 119 & 0.037 (0.048) & 16.5 & 2326 & 14127 \\ 
##   Artery - Tibial & 285 & 0.042 (0.02) & 18.4 & 2489 & 13504 \\ 
##   Brain - Anterior cingulate cortex (BA24) & 72 & 0.028 (0.036) & 15.4 & 2235 & 14515 \\ 
##   Brain - Caudate (basal ganglia) & 100 & 0.037 (0.047) & 17.2 & 2521 & 14632 \\ 
##   Brain - Cerebellar Hemisphere & 89 & 0.049 (0.068) & 16.0 & 2294 & 14295 \\ 
##   Brain - Cerebellum & 103 & 0.050 (0.06) & 18.3 & 2646 & 14491 \\ 
##   Brain - Cortex & 96 & 0.045 (0.057) & 17.7 & 2593 & 14689 \\ 
##   Brain - Frontal Cortex (BA9) & 92 & 0.038 (0.077) & 16.6 & 2416 & 14554 \\ 
##   Brain - Hippocampus & 81 & 0.037 (0.05) & 16.4 & 2376 & 14513 \\ 
##   Brain - Hypothalamus & 81 & 0.017 (0.043) & 15.2 & 2238 & 14759 \\ 
##   Brain - Nucleus accumbens (basal ganglia) & 93 & 0.029 (0.04) & 17.1 & 2498 & 14601 \\ 
##   Brain - Putamen (basal ganglia) & 82 & 0.032 (0.064) & 17.3 & 2495 & 14404 \\ 
##   Breast - Mammary Tissue & 183 & 0.029 (0.037) & 17.0 & 2496 & 14700 \\ 
##   Cells - EBV-transformed lymphocytes & 115 & 0.058 (0.1) & 16.6 & 2066 & 12454 \\ 
##   Cells - Transformed fibroblasts & 272 & 0.051 (0.031) & 20.0 & 2552 & 12756 \\ 
##   Colon - Sigmoid & 124 & 0.033 (0.019) & 16.0 & 2298 & 14321 \\ 
##   Colon - Transverse & 170 & 0.036 (0.058) & 16.3 & 2398 & 14676 \\ 
##   Esophagus - Gastroesophageal Junction & 127 & 0.032 (0.039) & 16.1 & 2270 & 14125 \\ 
##   Esophagus - Mucosa & 242 & 0.042 (0.03) & 17.1 & 2432 & 14239 \\ 
##   Esophagus - Muscularis & 219 & 0.039 (0.015) & 17.7 & 2493 & 14047 \\ 
##   Heart - Atrial Appendage & 159 & 0.042 (0.03) & 16.8 & 2327 & 13892 \\ 
##   Heart - Left Ventricle & 190 & 0.034 (0.034) & 16.5 & 2203 & 13321 \\ 
##   Liver & 98 & 0.033 (0.062) & 16.5 & 2230 & 13553 \\ 
##   Lung & 279 & 0.032 (0.034) & 17.0 & 2509 & 14775 \\ 
##   Muscle - Skeletal & 361 & 0.033 (0.03) & 17.9 & 2301 & 12833 \\ 
##   Nerve - Tibial & 256 & 0.052 (0.087) & 20.6 & 2992 & 14510 \\ 
##   Ovary & 85 & 0.037 (0.094) & 17.2 & 2418 & 14094 \\ 
##   Pancreas & 150 & 0.047 (0.024) & 17.2 & 2398 & 13941 \\ 
##   Pituitary & 87 & 0.038 (0.055) & 16.6 & 2527 & 15183 \\ 
##   Skin - Not Sun Exposed (Suprapubic) & 196 & 0.041 (0.034) & 17.0 & 2490 & 14642 \\ 
##   Skin - Sun Exposed (Lower leg) & 303 & 0.039 (0.016) & 18.4 & 2687 & 14625 \\ 
##   Small Intestine - Terminal Ileum & 77 & 0.036 (0.053) & 17.4 & 2591 & 14860 \\ 
##   Spleen & 89 & 0.059 (0.061) & 17.0 & 2451 & 14449 \\ 
##   Stomach & 171 & 0.032 (0.025) & 16.1 & 2338 & 14531 \\ 
##   Testis & 157 & 0.054 (0.044) & 17.8 & 3009 & 16936 \\ 
##   Thyroid & 279 & 0.044 (0.066) & 19.4 & 2838 & 14642 \\ 
##   Whole Blood & 339 & 0.033 (0.023) & 18.6 & 2260 & 12160 \\ 
##    \hline
## \end{tabular}
## \end{table}
print(kable(tab))
## 
## 
## tissue                                      n     mean h2 (SE)     % FDR<0.1   num FDR<0.1   num expressed 
## ------------------------------------------  ----  ---------------  ----------  ------------  --------------
## DGN Whole Blood                             922   0.149 (0.0039)   58.8        7474          12719         
## Cross-tissue                                450   0.062 (0.017)    20.2        2995          14861         
## Adipose - Subcutaneous                      298   0.038 (0.028)    18.8        2666          14205         
## Adrenal Gland                               126   0.043 (0.018)    17.5        2479          14150         
## Artery - Aorta                              198   0.042 (0.024)    17.2        2385          13844         
## Artery - Coronary                           119   0.037 (0.048)    16.5        2326          14127         
## Artery - Tibial                             285   0.042 (0.02)     18.4        2489          13504         
## Brain - Anterior cingulate cortex (BA24)    72    0.028 (0.036)    15.4        2235          14515         
## Brain - Caudate (basal ganglia)             100   0.037 (0.047)    17.2        2521          14632         
## Brain - Cerebellar Hemisphere               89    0.049 (0.068)    16.0        2294          14295         
## Brain - Cerebellum                          103   0.050 (0.06)     18.3        2646          14491         
## Brain - Cortex                              96    0.045 (0.057)    17.7        2593          14689         
## Brain - Frontal Cortex (BA9)                92    0.038 (0.077)    16.6        2416          14554         
## Brain - Hippocampus                         81    0.037 (0.05)     16.4        2376          14513         
## Brain - Hypothalamus                        81    0.017 (0.043)    15.2        2238          14759         
## Brain - Nucleus accumbens (basal ganglia)   93    0.029 (0.04)     17.1        2498          14601         
## Brain - Putamen (basal ganglia)             82    0.032 (0.064)    17.3        2495          14404         
## Breast - Mammary Tissue                     183   0.029 (0.037)    17.0        2496          14700         
## Cells - EBV-transformed lymphocytes         115   0.058 (0.1)      16.6        2066          12454         
## Cells - Transformed fibroblasts             272   0.051 (0.031)    20.0        2552          12756         
## Colon - Sigmoid                             124   0.033 (0.019)    16.0        2298          14321         
## Colon - Transverse                          170   0.036 (0.058)    16.3        2398          14676         
## Esophagus - Gastroesophageal Junction       127   0.032 (0.039)    16.1        2270          14125         
## Esophagus - Mucosa                          242   0.042 (0.03)     17.1        2432          14239         
## Esophagus - Muscularis                      219   0.039 (0.015)    17.7        2493          14047         
## Heart - Atrial Appendage                    159   0.042 (0.03)     16.8        2327          13892         
## Heart - Left Ventricle                      190   0.034 (0.034)    16.5        2203          13321         
## Liver                                       98    0.033 (0.062)    16.5        2230          13553         
## Lung                                        279   0.032 (0.034)    17.0        2509          14775         
## Muscle - Skeletal                           361   0.033 (0.03)     17.9        2301          12833         
## Nerve - Tibial                              256   0.052 (0.087)    20.6        2992          14510         
## Ovary                                       85    0.037 (0.094)    17.2        2418          14094         
## Pancreas                                    150   0.047 (0.024)    17.2        2398          13941         
## Pituitary                                   87    0.038 (0.055)    16.6        2527          15183         
## Skin - Not Sun Exposed (Suprapubic)         196   0.041 (0.034)    17.0        2490          14642         
## Skin - Sun Exposed (Lower leg)              303   0.039 (0.016)    18.4        2687          14625         
## Small Intestine - Terminal Ileum            77    0.036 (0.053)    17.4        2591          14860         
## Spleen                                      89    0.059 (0.061)    17.0        2451          14449         
## Stomach                                     171   0.032 (0.025)    16.1        2338          14531         
## Testis                                      157   0.054 (0.044)    17.8        3009          16936         
## Thyroid                                     279   0.044 (0.066)    19.4        2838          14642         
## Whole Blood                                 339   0.033 (0.023)    18.6        2260          12160

Calculate num expressed genes and make lists per tissue for filtering

tislist <- scan(my.dir %&% 'tissue.list',sep="\n",what="character")

expidlist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.ID.list","character")
expgenelist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENE.list","character")
exp <- readRDS(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENExID.RDS")
expdata <- matrix(exp, ncol=length(expidlist), byrow=T)
t.expdata <- t(expdata)
rownames(t.expdata) <- expidlist
colnames(t.expdata) <- expgenelist

gencodefile <- annot.dir %&% "gencode.v18.genes.patched_contigs.summary.protein"
gencode <- read.table(gencodefile)
rownames(gencode) <- gencode[,5]
t.expdata <- t.expdata[,intersect(colnames(t.expdata),rownames(gencode))] ###pull protein coding gene expression data

sam <- read.table(annot.dir %&% "GTEx_Analysis_2014-06-13.SampleTissue.annot",header=T,sep="\t")

for(i in 1:length(tislist)){
  tissue <- tislist[i]
  tis<- gsub(' ','',tissue) ##removes all whitespace to match .RDS files
  sample <- subset(sam,SMTSD == tissue) ### pull sample list of chosen tissue
  tissue.exp <- t.expdata[intersect(rownames(t.expdata),sample$SAMPID),] ###pull expression data for chosen tissue###
  tissue.exp <- t(tissue.exp) #for merging in R
  
  explist <- subset(rowMeans(tissue.exp), rowMeans(tissue.exp)>0.1) ###pull genes with mean expression > 0.1###
  explist <- names(explist)
  nz.expdata <- tissue.exp[explist,]

  #calc 10% of sample size
  tenpercent <- round(0.1*dim(nz.expdata)[2])
  
  rowtable<-function(x) table(x>0)[[1]] > 2 ##function to determine if >2 samples have exp levels >0
  nonbin<-apply(nz.expdata,1,rowtable) ##apply to matrix
  gt2.expdata <- nz.expdata[nonbin,] ##remove genes with >10% of RPKM's==0
  
  write.table(rownames(gt2.expdata),file=out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist",quote=FALSE,col.names = FALSE,row.names=FALSE)
  cat(tis,":",dim(gt2.expdata)[1],"genes\n")
}